
# A4a_price_index_agg_entry
#==============================================================================

# Description: This code computes the difference in the gains from trade between
# poor and rich on average on aggregate in the entry-only model

rm(list = ls())

library('data.table')
library('stargazer')
library('ggplot2')

setwd("D:/data_replication")


# Import and format list of products for each country
#===============================================================================

product_id <- fread("estimation/2_product_list/product_id_declarant.csv")

pc8plus_i_k <- fread("estimation/4_demand_estimation/pc8plus_i_k.csv")
names(pc8plus_i_k) <- c("product_id", "V1", "k")
pc8plus_i_k <- pc8plus_i_k[ , .(product_id, k)]
pc8plus_i_k[ , product_id := as.integer(product_id)]

k_to_i_k <- fread("estimation/5_supply_side/k_to_i_k.csv")

product_id <- merge(product_id, pc8plus_i_k, by = "product_id", all = T)
product_id <- product_id[k != 0]
product_id <- product_id[ , .(declarant, k)]

product_id <- merge(product_id, k_to_i_k, by = "k", all = T)
product_id <- product_id[ , .(i_k, declarant)]


load('robustness/firm_heterogeneity/data_PI_entry.RData')


data[ , N_new := N_dom_counter - N_dom_true]
data <- data[N_new != 30]
data <- data[N_new != 100]

data <- data[P_true_sorted_20 > 0.0000001]
data <- data[P_true_sorted_80 > 0.0000001]
data <- data[P_counter_sorted_20 > 0.0000001]
data <- data[P_counter_sorted_80 > 0.0000001]

# Create difference between poor and rich
#-------------------------------------------------------------------------------

data[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]

# Infer u/beta's

data[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

# Merge in Cobb-Douglas Weights
#-------------------------------------------------------------------------------

setnames(product_id, "declarant", "Declarant")
setnames(product_id, "i_k", "product")
product_id[ , index_i_k := 1]
data = merge(data, product_id, by = c("product", "Declarant"), all = T)
data <- data[index_i_k == 1]


w = read.csv("estimation/4_demand_estimation/4_cobb_douglas_weights/weights_i_k.csv")
colnames(w)[1:4] = c("product", "Year", "Quarter", "Declarant")
data = merge(data, w, by = c('product', 'Year', 'Quarter', 'Declarant'))
data = data[s_20_i_k != 0]
data = data[s_80_i_k != 0]


# Construct Price Index
#===============================================================================

# Remove extreme observations
#-------------------------------------------------------------------------------

data_PI = data
data_PI = data_PI[diff < 10 & diff > -10]
data_PI = data_PI[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]

data_PI[ , v_true_20 := beta_price_20 * log(P_true_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_counter_20 := beta_price_20 * log(P_counter_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_true_80 := beta_price_80 * log(P_true_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]
data_PI[ , v_counter_80 := beta_price_80 * log(P_counter_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]


load("estimation/5_supply_side/product_id.RData")
colnames(product_id) = c("product", "Declarant")
product_id[ , idx_PI := 1]
data_PI = merge(data_PI, product_id, by = c('product', 'Declarant'), all = T)
data_PI <- data_PI[is.na(Year) == F]
data_PI <- data_PI[idx_PI == 1]

data_PI <- data_PI[, .SD[((s_80_i_k < quantile(s_80_i_k, probs = 0.99)) & (s_20_i_k < quantile(s_20_i_k, probs = 0.99)))], by = 'Declarant']

data_PI[ , sum_w := sum(s_20_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_20_i_k := s_20_i_k / sum_w]

data_PI[ , sum_w := sum(s_80_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_80_i_k := s_80_i_k / sum_w]

#data_PI <- data_PI[ , obs := .N, by = c('Year', 'Quarter', 'Declarant')]
#data_PI <- data_PI[ , omega_equ := 1/obs]

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_20_i_k*log(s_20_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_true_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_true_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_counter_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_counter_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , diff := PI_counter_20 / PI_true_20 - PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , gft_20 := PI_counter_20 / PI_true_20]
data_PI <- data_PI[ , gft_80 := PI_counter_80 / PI_true_80]

#===============================================================================


# Run Regression, save, and plot
#===============================================================================

Reg_P_summary <- lm(diff ~ 0 + as.factor(Declarant), data = data_PI)
print(summary(Reg_P_summary))
reg_gft_20 <- lm(gft_20 ~ 0 + as.factor(Declarant), data = data_PI)
reg_gft_80 <- lm(gft_80 ~ 0 + as.factor(Declarant), data = data_PI)

# Format Data to Save Regression Output

declarant <- as.matrix(Reg_P_summary$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "estimation/5_supply_side/gdp_per_capita_2007.csv")

# Save Regression

output <- merge(income_data, data.table(declarant, Reg_P_summary$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "FE_declarant")
write.csv(file = "robustness/firm_heterogeneity/price_index/results/diff_PI_entry.csv", output)

output_gft_20 <- merge(income_data, data.table(declarant, reg_gft_20$coefficients), by = "declarant")
colnames(output_gft_20) <- c(colnames(output)[1:4], "gft_20")
output_gft_80 <- merge(income_data, data.table(declarant, reg_gft_80$coefficients), by = "declarant")
colnames(output_gft_80) <- c(colnames(output)[1:4], "gft_80")
output_gft_80$year = NULL
output_gft_80$country = NULL
output_gft_80$gdp_per_capita_declarant = NULL
output_gft <- merge(output_gft_20, output_gft_80, by = "declarant")
write.csv(file = "robustness/firm_heterogeneity/price_index/results/gft_PI_entry.csv", output_gft)


# Plot 1
#-------------------------------------------------------------------------------

rm(list = ls())

library("data.table")  
library("ggplot2")  
library("ggrepel")  
library("stargazer")  

output <- fread('robustness/firm_heterogeneity/price_index/results/diff_PI_entry.csv')

iso3 <- fread('estimation/5_supply_side/ISO3_declarant_codes.csv')

output[ , gdp_log := log(gdp_per_capita_declarant)]
setkey(output, declarant)
setkey(iso3, declarant)

output <- output[iso3]
output <- output[is.na(country) == F]
output = output[declarant != 18] 
output = output[declarant != 46] 
output = output[declarant != 600] 


reg <- lm(FE_declarant ~ gdp_log, data = output)
summary(reg)
cor(output$FE_declarant, output$gdp_log)

plot_diff_entry <- ggplot(output, aes(x=gdp_log, y=FE_declarant)) +
  geom_text_repel(label=output$ISO3, size = 5) +
  theme(text = element_text(size=17)) +
  geom_smooth(method=lm) +
  labs(x = "GDP per capita (in logs)", y = expression(paste(Delta, "Welfare"["Poor"]," - ", Delta, "Welfare"["Rich"]))) 

ggsave("robustness/firm_heterogeneity/price_index/results/plot_PI_entry.png", plot = plot_diff_entry)


# Plot 2
#-------------------------------------------------------------------------------

rm(list = ls())

library("data.table")  
library("ggplot2")  
library("ggrepel")  
library("stargazer")  

output <- fread('robustness/firm_heterogeneity/price_index/results/diff_PI_entry.csv')

iso3 <- fread('estimation/5_supply_side/ISO3_declarant_codes.csv')

imp_share = fread('robustness/firm_heterogeneity/import_share.csv')

output = merge(output, imp_share)

output[ , gdp_log := log(gdp_per_capita_declarant)]
setkey(output, declarant)
setkey(iso3, declarant)

output <- output[iso3]
output <- output[is.na(country) == F]
output = output[declarant != 18]
output = output[declarant != 600] 
output = output[declarant != 46] 

reg <- lm(FE_declarant ~ gdp_log, data = output)
summary(reg)
cor(output$FE_declarant, output$gdp_log)

plot_diff_full_adj <- ggplot(output, aes(x=imp_share, y=FE_declarant)) +
  geom_text_repel(label=output$ISO3, size = 5) +
  theme(text = element_text(size=17)) +
  geom_smooth(method=lm) +
  labs(x = "Avg. Import Share", y = expression(paste(Delta, "Welfare"["Poor"]," - ", Delta, "Welfare"["Rich"]))) 

ggsave("robustness/firm_heterogeneity/price_index/results/plot_size.png", plot = plot_diff_full_adj)



